Use the Lord of the Rings data emailed to you to answer the following questions. Note that these data are from jennybc and represent the number of words spoken by characters in the LOTR trilogy. Some other, pretty amazing visualizations can be seen here, the work of Nadieh Bremer. You are merely looking at how many times a particular race or character appears on screen with a dialogue of at least one word.
Racelibrary(ggplot2)
ggplot(lotr, aes(x = Race)) + geom_bar() +
labs(x = "Frequency", y = "Race", title = "Distribution of Race in The Lord of the Rings",
caption = "Source: Jenny Bryan's Lord of the Rings dataset")Film to see how Race is distributed across Film.ggplot(lotr, aes(x = Race)) + geom_bar() +
facet_wrap(~Film) + labs(x = "Frequency",
y = "Race", title = "Distribution of Race in The Lord of the Rings",
subtitle = "(by Film)", caption = "Source: Jenny Bryan's Lord of the Rings dataset")alternatively, you could do this:
ggplot(lotr, aes(x = Race, group = Film,
fill = Film)) + geom_bar(position = "dodge") +
labs(x = "Frequency", y = "Race", title = "Distribution of Race in The Lord of the Rings",
subtitle = "(by Film)", caption = "Source: Jenny Bryan's Lord of the Rings dataset")Character by film. Use coord_flip() to flip the coordinates so that the characters show up on the y-axis.ggplot(lotr, aes(x = Character, group = Film,
fill = Film)) + geom_bar(position = "dodge") +
coord_flip() + labs(y = "Frequency",
x = "Character", title = "Distribution of Characters in The Lord of the Rings",
subtitle = "(by Film)", caption = "Source: Jenny Bryan's Lord of the Rings dataset")facet_wrap() to generate the three-panel layout, one panel per film.ggplot(lotr, aes(x = Character, group = Film,
fill = Film)) + geom_bar(position = "dodge") +
coord_flip() + facet_wrap(~Film) + theme(legend.position = "none") +
labs(y = "Frequency", x = "Character",
title = "Distribution of Characters in The Lord of the Rings",
subtitle = "(by Film)", caption = "Source: Jenny Bryan's Lord of the Rings dataset")ggplot(lotr, aes(x = Words)) + geom_histogram(fill = "cornflowerblue") +
labs(x = "Words", y = "Frequency", title = "Distribution of Words Spoken in The Lord of the Rings",
caption = "Source: Jenny Bryan's Lord of the Rings dataset")ggplot(lotr, aes(x = Words)) + geom_histogram(fill = "cornflowerblue") +
facet_wrap(~Film) + labs(x = "Words",
y = "Frequency", title = "Distribution of Words Spoken in The Lord of the Rings",
subtitle = "(by Film)", caption = "Source: Jenny Bryan's Lord of the Rings dataset")ggplot(lotr, aes(x = Words)) + geom_histogram(fill = "cornflowerblue") +
facet_wrap(~Race) + labs(x = "Words",
y = "Frequency", title = "Distribution of Words Spoken in The Lord of the Rings",
subtitle = "(by Race)", caption = "Source: Jenny Bryan's Lord of the Rings dataset")Download the monthly Great Lakes water level dataset SPSS format from here and Excel format from here. Note that water level is in meters.
Use the following command to read in the excel file:
library(readxl)
url <- "https://aniruhil.github.io/avsr/teaching/dataviz/greatlakes.xlsx"
destfile <- "greatlakes.xlsx"
curl::curl_download(url, destfile)
greatlakes <- read_excel(destfile, col_types = c("date",
"numeric", "numeric", "numeric", "numeric",
"numeric"))Now use an appropriate chart to show the water level for Lake Superior.
ggplot(greatlakes, aes(x = monthyear, y = Superior)) +
geom_line() + labs(x = "Year", y = "Water level (in meters)",
title = "Water Level in Lake Superior",
caption = "Source: MPA 5830")Download the 2017 County Health Rankings data SPSS format from here, Excel format from here and the accompanying codebook.
Construct appropriate plots that shows the relationship between the following pairs of variables
library(readxl)
chr <- read_excel("~/Documents/Teaching/mpa5830/data/CountyHealthRankings2017.xlsx")ggplot(chr, aes(x = Adult_obesity, y = High_school_graduation)) +
geom_point() + labs(x = "Adult Obesity",
y = "High School Graduation Rate", title = "Scatterplot of Adult Obesity and High School Graduation Rate",
caption = "Source: County Health Rankings (2017)")ggplot(chr, aes(x = Children_in_poverty,
y = High_school_graduation)) + geom_point() +
labs(x = "Percent of Children in Poverty",
y = "High School Graduation Rate",
title = "Scatterplot of Child Poverty and High School Graduation Rate",
caption = "Source: County Health Rankings (2017)")ggplot(chr, aes(x = Preventable_hospital_stays,
y = Unemployment_rate)) + geom_point() +
labs(x = "Preventable hospital stays",
y = "Unemployment Rate", title = "Scatterplot of Preventable Hospital Stays and Unemployment Rate",
caption = "Source: County Health Rankings (2017)")Use the unemployment data given to you and construct appropriate plots that show the distribution of unemployment rates across years for each of the four educational attainment groups.
load("~/Documents/Teaching/mpa5830/data/unemprate.RData")
ggplot(urate, aes(x = yearmonth, y = rate,
group = educ_group, color = educ_group)) +
geom_line() + theme(legend.position = "bottom") +
labs(x = "Year", y = "Unemployment Rate",
title = "Unemployment Rate", subtitle = "(by Educational Attainment and Year)",
caption = "Source: County Health Rankings (2017)",
color = "")See if (a) you can pretty the labels for educational attainment (for eg., anything like BA_or_more is ugly), and (b) order the groups such that educational attainment shows up in the plot legend as the ordinal variable that it is. Then regenerate the plot.
library(dplyr)
urate <- urate %>% mutate(edgroup = gsub("_",
" ", educ_group), edgroup = ordered(edgroup,
levels = c("Less than High School", "High School",
"Some College", "BA or more")))
ggplot(urate, aes(x = yearmonth, y = rate,
group = edgroup, color = edgroup)) +
geom_line() + scale_color_discrete() +
theme(legend.position = "bottom") + labs(x = "Year",
y = "Unemployment Rate", title = "Unemployment Rate",
subtitle = "(by Educational Attainment and Year)",
caption = "Source: County Health Rankings (2017)",
color = "")